# Implementation of the ed2k hash algorithm.

# @author Kovensky (mailto:diogomfranco@gmail.com)

require 'openssl'
require 'digest'

# {Digest::Class Digest} subclass that calculates an ed2k hash.
#
# The implemented algorithm is as described on http://wiki.anidb.net/w/Ed2k,
# using the "red code" method (appending null to hashlist).
#
# Uses {OpenSSL::Digest OpenSSL::Digest::MD4} internally for the MD4 hashing.
#
# @note Due to ed2k's use of 9500KB chunks, this object can be up to around
#       10MB large in memory, not counting OpenSSL::Digest::MD4's state.
class Digest::ED2k < Digest::Class
    # The version
    VERSION = '1.0.0'

    # Chunk size of the ed2k hash, 9500KB.
    CHUNK_SIZE = 9728000

    # Creates a reset object.
    # @param initial_chunk optionally appends data to the new object
    def initialize (initial_chunk = nil)
        @md4 = OpenSSL::Digest::MD4.new
        self.reset
        self << initial_chunk if initial_chunk
    end

    # Reads the contents of `io`, 9500KB at a time,
    # until EOF, and add to the digest.
    #
    # @param [IO] io the {IO} to be read
    # @return self for convenient chaining.
    def io (io)
        # why do I have to use a while instead of an each_chunk or sth
        buf = ""
        while io.read CHUNK_SIZE, buf
            self << buf
        end
        return self
    end

    # Opens the file pointed by path and calls {#io} with it.
    #
    # @param [String] path path to a file that will be read
    # @return self for convenient chaining.
    def file (path)
        File.open(path) do |f|
            return self.io f
        end
    end

    # Resets the state; effectively the same as constructing a new object.
    # @return self for convenient chaining.
    def reset
        @md4.reset
        @buf = ""
        @rounds = 0
        @finalized = false
        return self
    end

    # Append `data` to the digest. Will raise an {ArgumentError}
    # if the object has been {#finalize}d before.
    #
    # Every 9500KB of accumulated data will be hashed as per the ed2k algorithm.
    #
    # @param [String] data the string to be appended to the digest
    # @return self for convenient chaining.
    def update (data)
        if @finalized
            raise ArgumentError.new("Can't add to an ed2k hash after finalizing. Call reset if you want to calculate a new hash.")
        end
        @buf += data
        _sync
        return self
    end
    alias << update

    # {#finalize}s the digest and returns it.
    #
    # @note Due to how ed2k hashes work, once the digest has been obtained,
    #       the object must be {#reset} before being reused. The {#digest!} family
    #       of methods automatically resets the object after being called.
    #
    # @param str if present and non-nil, will cause digest to reset self,
    #            update it with `str` and return the new digest.
    # @return [String] the raw digest
    def digest(str = nil)
        unless str
            self.finalize
            @md4.digest
        else
            reset
            self << str
            digest
        end
    end

    # (see #digest)
    # @note calls {#reset} on self after obtaining digest
    def digest!
        ret = digest
        reset
        return ret
    end

    # {include:#digest}
    # @note (see #digest)
    # @param (see #digest)
    # @return [String] the digest as a hexadecimal string
    def hexdigest(str = nil)
        unless str
            self.finalize
            @md4.hexdigest
        else
            reset
            self << str
            hexdigest
        end
    end

    # {include:#hexdigest}
    # @param (see #hexdigest)
    # @return (see #hexdigest)
    # @note (see #digest!)
    def hexdigest!
        ret = hexdigest
        reset
        return ret
    end

    # Finalizes the digest and prevents any new data from being added
    # to it. This is automatically called by the {#digest} family of methods.
    # The only way to unlock the object is to {#reset} it.
    #
    # @return self for convenient chaining.
    def finalize
        unless @finalized
            if @rounds > 0
                @md4 << OpenSSL::Digest::MD4.digest(@buf)
            else
                @md4.reset
                @md4 << @buf
            end
            @buf = nil
            @finalized = true
        end
        return self
    end

    # Override for {Object#inspect Object's inspect}
    def inspect
        return "#<ed2k unfinalized>" unless @finalized
        return "#<ed2k hash=\"#{digest}\">"
    end

    private
    # When the internal buffer exceeds {CHUNK_SIZE}, feed CHUNK_SIZE chunks
    # to the MD4 hashing function until the buffer is below CHUNK_SIZE
    # of length.
    def _sync
        while @buf.length >= CHUNK_SIZE
            @md4 << OpenSSL::Digest::MD4.digest(@buf[0...CHUNK_SIZE])
            @buf = @buf[CHUNK_SIZE...@buf.length] || ""
            @rounds += 1
        end
    end

    public
    # Calculates and returns the {#digest} on data.
    def self.digest (data)
        return new(data).digest
    end

    # Calculates and returns the {#hexdigest} on data.
    def self.hexdigest (data)
        return new(data).hexdigest
    end

    # The same as calling {#io}(io) on a new object.
    def self.io(io)
        return new.io io
    end

    # The same as calling {#file}(path) on a new object.
    def self.file(path)
        return new.file path
    end
end
